در اینجا پردازش های اولیه و توابع پراستفاده را می نویسیم.با استفاده از داده های زلزله ها در ایران و جهان به سوالات زیر پاسخ دهید.
library(ggthemes)
library(ggplot2)
library(readr)
library(ggmap)
library(plyr)
library(dplyr)
library(stringr)
library(highcharter)
library(countrycode)
library(knitr)
#setwd("~/Desktop/96-97-2/Data Analysis/HW/HW11/")
read_rds(path = "../data_eq/historical_web_data_26112015.rds") -> historical_eq
disaster = read_delim("../data_eq/disaster.txt", "\t",
escape_double = FALSE, trim_ws = TRUE)
world = map_data("world")
read_rds("../data_eq/iran_earthquake.rds") -> iequake
myMap = read_rds("../data_eq/Tehrn_map_6.rds")
read_csv("../data_eq/worldwide.csv") -> ww
get_world_map= function(){
library(maps)
library(ggplot2)
library(sp)
library(ggmap)
###################################################################################################
# Recentre worldmap (and Mirrors coordinates) on longitude 160
### Code by Claudia Engel March 19, 2012, www.stanford.edu/~cengel/blog
### Recenter ####
center <- 0 # positive values only
# shift coordinates to recenter worldmap
worldmap <- map_data ("world")
worldmap$long.recenter <- ifelse(worldmap$long < center - 180 , worldmap$long + 360, worldmap$long)
### Function to regroup split lines and polygons
# Takes dataframe, column with long and unique group variable, returns df with added column named group.regroup
RegroupElements <- function(df, longcol, idcol){
g <- rep(1, length(df[,longcol]))
if (diff(range(df[,longcol])) > 300) { # check if longitude within group differs more than 300 deg, ie if element was split
d <- df[,longcol] > mean(range(df[,longcol])) # we use the mean to help us separate the extreme values
g[!d] <- 1 # some marker for parts that stay in place (we cheat here a little, as we do not take into account concave polygons)
g[d] <- 2 # parts that are moved
}
g <- paste(df[, idcol], g, sep=".") # attach to id to create unique group variable for the dataset
df$group.regroup <- g
df
}
### Function to close regrouped polygons
# Takes dataframe, checks if 1st and last longitude value are the same, if not, inserts first as last and reassigns order variable
ClosePolygons <- function(df, longcol, ordercol){
if (df[1,longcol] != df[nrow(df),longcol]) {
tmp <- df[1,]
df <- rbind(df,tmp)
}
o <- c(1: nrow(df)) # rassign the order variable
df[,ordercol] <- o
df
}
# now regroup
worldmap.rg <- ddply(worldmap, .(group), RegroupElements, "long.recenter", "group")
# close polys
worldmap.cp <- ddply(worldmap.rg, .(group.regroup), ClosePolygons, "long.recenter", "order") # use the new grouping var
#############################################################################
return(ggplot(aes(x = long.recenter, y = lat), data = worldmap.cp) +
geom_polygon(aes(group = group.regroup), fill="#f9f9f9", colour = "grey65") +
scale_y_continuous(limits = c(-60, 85)) +
coord_equal() + theme_Publication()+
theme(legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.border = element_rect(colour = "black")))
}
# The single argument to this function, points, is a data.frame in which:
# - column 1 contains the longitude in degrees
# - column 2 contains the latitude in degrees
coords2country = function(points)
{
countriesSP <- getMap(resolution='low')
#countriesSP <- getMap(resolution='high') #you could use high res map from rworldxtra if you were concerned about detail
# convert our list of points to a SpatialPoints object
# pointsSP = SpatialPoints(points, proj4string=CRS(" +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"))
#setting CRS directly to that from rworldmap
pointsSP = SpatialPoints(points, proj4string=CRS(proj4string(countriesSP)))
# use 'over' to get indices of the Polygons object containing each point
indices = over(pointsSP, countriesSP)
# return the ADMIN names of each country
indices$ADMIN
#indices$ISO3 # returns the ISO3 code
#indices$continent # returns the continent (6 continent model)
#indices$REGION # returns the continent (7 continent model)
}
place2country <- function (place){
place %>% str_trim() %>% str_split_fixed(",", 5) -> place_parts
result = place_parts[,5]
for(i in 4:1){
current = place_parts[,i]
result = if_else(condition = (result == ""), true = place_parts[,i], false = result)
for(cstate in c(state.name, "CA")){
result %>% str_replace(cstate,"United States of America") -> result
}
}
return(result)
}۱. با استفاده از داده های historical_web_data_26112015.rds و استفاده از نمودار پراکنش سه بعدی بسته plotly نمودار طول، عرض و عمق زلزله ها را رسم نمایید. علاوه بر آن بزرگی هر نقطه را برابر بزرگی زمین لرزه قرار دهید.
library(plotly)
plot_ly(historical_eq, x = ~Longitude, y = ~Latitude, z = ~Depth, size = ~Magnitude,
marker = list( color = ~Magnitude, symbol = 'circle', sizemode = 'diameter', colorscale = c('#FFE1A1', '#683531'), showscale = TRUE), sizes = c(1, 30),
text = ~paste('Time:', Time, '<br>Magnitude:', Magnitude, '<br>Latitude:', Latitude,
'<br>Longitude:', Longitude, '<br>Depth:',Depth, '<br>Province:', Province,
'<br>City:', City)) %>%
layout(title = 'Earthquakes',
scene = list(xaxis = list(title = 'Longitude',
gridcolor = 'rgb(255, 255, 255)',
zerolinewidth = 1,
ticklen = 5,
gridwidth = 2),
yaxis = list(title = 'Latitude',
gridcolor = 'rgb(255, 255, 255)',
zerolinewidth = 1,
ticklen = 5,
gridwith = 2),
zaxis = list(title = 'Depth',
gridcolor = 'rgb(255, 255, 255)',
zerolinewidth = 1,
ticklen = 5,
gridwith = 2)),
paper_bgcolor = 'rgb(243, 243, 243)',
plot_bgcolor = 'rgb(243, 243, 243)')۲. پویانمایی سونامی های تاریخی را بر حسب شدت بر روی نقشه زمین رسم نمایید.(از داده زلزله های بزرگ استفاده نمایید.)
تابع نوشته شده در زیر در ابتدا کار می کرد ولی پس از آپدیت پکیج دیگر کار نمی کرد، فایل گیف قبلی در کنار فایل اچ تی ام ال موجود است.
disaster_tsu = disaster[disaster$FLAG_TSUNAMI == "Tsu", ] %>% filter(!is.na(EQ_PRIMARY))
ggplot()+
geom_polygon(data = world, aes(x = long, y = lat, group = group),
fill = "white",
color = "lightblue")+
geom_point(data = disaster_tsu, aes(x = LONGITUDE,
y = LATITUDE,
size = EQ_PRIMARY,
color = EQ_PRIMARY,
frame = YEAR), alpha = 0.7)+
scale_colour_gradient(low = "pink", high = "darkred",
space = "Lab", na.value = "grey50", guide = "colourbar")+
ggtitle("Earthquakes")+
xlab("Longitude")+ylab("Latitude")+
coord_fixed(1.3)+
theme_solarized()+
guides(size = F, color = F)-> q
### worked in previous versions but doesn't work now, the gif is attached anyway
#gganimate::gganimate(q, filename = "tsu.gif")۳. نمودار چگالی دو بعدی زلزله های تاریخی ایران را رسم کنید.( از داده iran_earthquake.rds و لایه stat_density_2d استفاده نمایید).
ggmap(myMap)+
geom_point(aes(x = Long, y = Lat), data = iequake,
alpha = .1, color="#fdb462", size = 0.2)+
stat_density_2d(aes(x = Long, y = Lat), color = "#386cb0", data = iequake) +
xlab("Longitude")+
ylab("Latitude" )+
ggtitle("Iran Earthquakes Density Map")۴. احتمال اینکه در ایران در پنج سال آینده زلزله به بزرگی هفت ریشتر رخ دهد را محاسبه کنید. (از احتمال شرطی استفاده کنید.)
iequake %>%
select(time = OriginTime, mag = Mag) %>%
as.data.frame() %>%
filter(mag >= 7) %>%
mutate(year = as.numeric(format(time,"%Y")), month = as.numeric(format(time,"%m"))) %>%
select(-mag, -time)-> iequake_big
disaster %>%
filter(COUNTRY == "IRAN") %>%
select(year = YEAR, month = MONTH, mag = EQ_PRIMARY) %>%
filter(mag >= 7) %>%
as.data.frame() %>%
select(-mag)-> dis_iran
union(dis_iran, iequake_big) %>%
as.data.frame() %>%
arrange(-year, -month) %>%
mutate(month_from_before = month - lead(month) + 12 * (year - lead(year))) %>%
filter(year >= 1900) -> last_eqs
last_eqs[1,] %>% kable()| year | month | month_from_before |
|---|---|---|
| 2017 | 11 | 25 |
12*(as.numeric(format(Sys.time(),"%Y"))- last_eqs[1,]$year) +
as.numeric(format(Sys.time(),"%m")) - last_eqs[1,]$month -> months_past_from_last
(months_past_from_last + 60) -> desired_distance
last_eqs$month_from_before -> distances
paste0("probability: ", round(sum(distances <= desired_distance) / sum(!is.na(distances)),2))## [1] "probability: 0.59"
۵. بر اساس داده های زلزله های بزرگ ابتدا تعداد و متوسط کشته زلزله ها را بر حسب کشور استخراج نمایید. سپس نمودار گرمایی تعداد کشته ها را بر روی کره زمین رسم نمایید.(مانند مثال زیر!)
disaster %>%
group_by(COUNTRY) %>%
filter(!is.na(TOTAL_DEATHS)) %>%
summarise(sum_death = sum(TOTAL_DEATHS), mean_death = round(mean(TOTAL_DEATHS),2)) %>%
mutate(COUNTRY = countrycode(COUNTRY, "country.name", "iso2c" )) -> countries_death_toll
hcmap("custom/world", data = countries_death_toll, value = "sum_death",
joinBy = c("iso-a2","COUNTRY"), name = "Total Death Toll",
dataLabels = list(enabled = TRUE, format = '{point.name}'),
borderColor = "#FAFAFA", borderWidth = 0.1) %>%
hc_mapNavigation(enabled = TRUE)hcmap("custom/world", data = countries_death_toll, value = "mean_death",
joinBy = c("iso-a2","COUNTRY"), name = "Mean Death Toll",
dataLabels = list(enabled = TRUE, format = '{point.name}'),
borderColor = "#FAFAFA", borderWidth = 0.1) %>%
hc_mapNavigation(enabled = TRUE)۶. با استفاده از داده لرزه های بزرگ و به وسیله طول، عرض، شدت، عمق مدلی برای پیش بینی تعداد کشته های زلزله بیابید.
model_df <- disaster %>%
select(LONGITUDE, LATITUDE, EQ_PRIMARY, FOCAL_DEPTH, TOTAL_DEATHS)
model<- glm(TOTAL_DEATHS ~., model_df, family = "poisson")
model %>% summary()##
## Call:
## glm(formula = TOTAL_DEATHS ~ ., family = "poisson", data = model_df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -384.38 -61.46 -39.20 -21.18 1521.82
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.389e+00 5.064e-03 -274.3 <2e-16 ***
## LONGITUDE 8.578e-04 6.032e-06 142.2 <2e-16 ***
## LATITUDE 1.973e-02 2.745e-05 718.9 <2e-16 ***
## EQ_PRIMARY 1.348e+00 6.850e-04 1968.1 <2e-16 ***
## FOCAL_DEPTH -2.631e-02 4.480e-05 -587.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 18100341 on 1051 degrees of freedom
## Residual deviance: 13160953 on 1047 degrees of freedom
## (4974 observations deleted due to missingness)
## AIC: 13166049
##
## Number of Fisher Scoring iterations: 8
۷. با استفاده از داده worldwide.csv به چند سوال زیر پاسخ دهید. تحقیق کنید آیا می توان از پیش لرزه، زلزله اصلی را پیش بینی کرد؟
زلزله ها را برحسب درجات آن ها در بازه های ۳ تایی تقسیم می کنیم. در این بازه ها اولین زلزله و بزرگترین زلزله را به دست می آوریم. سپس توزیع فواصل این ۲ را هم به دست می آوریم تا ببینیم آیا توزیع خوبی دارد یا نه. با کشیدن توزیع آن می بینیم که توزیع جالبی ندارد و حتی یکنوا هم نیست توزیع آن. پس بنابراین نمی توان از روی پیش لرزه پس لرزه را پیش بینی کرد ولی می توان حدودی را داشت.
ww %>%
mutate(lat = round(latitude, 0), long = round(longitude, 0)) %>%
select(time, lat, long, mag) %>%
arrange(time) -> ww7
ww7 %>% mutate(days_from_beginning =
round(as.numeric(time- ww7$time[1])/(24*60*60*3),0)) %>%
group_by(lat, long, round(days_from_beginning/3, 0)) %>%
summarise(greatest = max(mag),
greatest_time= time[which.max(mag)],
first = min(time),
count = n()) %>%
ungroup() %>%
filter(greatest >= 4.5) %>%
filter(count > 6) %>%
mutate(distance = as.numeric(greatest_time - first)) %>%
select(distance) %>%
as.data.frame() -> pre_post
ggplot(pre_post, aes(x = distance / 60 / 60 )) +
geom_density() +
ggtitle("Distance Distribution") +
xlab("Distance (hours)") +
ylab("density")۸. گزاره " آیا شدت زلزله به عمق آن بستگی دارد" را تحقیق کنید؟ (طبیعتا از آزمون فرض باید استفاده کنید.)
از تست کریلیشن اسپیرمن استفاده می کنیم و می بینیم که اندکی تاثیر مثبت دارد.
cor(ww$mag,
ww$depth,
method = "spearman")## [1] 0.2344252
cor.test(ww$mag,
ww$depth,
paired = T,
method = "spearman") -> result
result##
## Spearman's rank correlation rho
##
## data: ww$mag and ww$depth
## S = 2.8439e+13, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2344252
result$p.value## [1] 0
chisq.test(ww$mag,
ww$depth)##
## Pearson's Chi-squared test
##
## data: ww$mag and ww$depth
## X-squared = 3403700, df = 4463700, p-value = 1
ggpubr::ggscatter(ww, x = "depth", y = "mag",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "spearman",
xlab = "Depth", ylab = "Magnitude",color = "mag", alpha = 0.5)+
guides(color = F)۹. میانگین سالانه زلزله ها را بر حسب کشور به دست آورید. آیا میتوان دلیلی در تایید یا رد تئوری هارپ ارائه کرد.
ابتدا تعداد زلزله های در هر کشور را با ۲ راه یکی با بررسی ستون مکان و سپس هم با بررسی کشوری که زلزله در محیط بسته ی آن رخ داده اندازه می گیریم.
در نهایت هم می بینیم که با مقایسه ی صفحات تکتونیک زمین و جاهایی که زلزله رخ داده است دلیلی برای قبول کردن تئوری هارپ وجود ندارد و اکثر زلزله ها همانطور که انتظار می رود روی صفحات تکتونیک هستند.
library(sp)
library(rworldmap)
(ww$time %>% max() - ww$time %>% min()) %>% as.numeric() / 365.4 -> period
ww_country <- ww %>% filter(mag > 4)
ww_country$country <-
ww_country %>%
select(longitude, latitude) %>%
coords2country() %>%
as.character()
ww_country %>%
group_by(country) %>%
summarize(mean = round(n()/period, 2)) %>%
ungroup() %>%
mutate(code = countrycode(country, "country.name", "iso3c")) %>%
arrange(desc(mean)) -> countries_year
hcmap(
"custom/world",
data = countries_year %>% na.omit(),
value = "mean",
joinBy = c("iso-a3", "code"),
name = "Avg. > 4 Richters Earthquakes in a year",
dataLabels = list(enabled = TRUE, format = '{point.name}'),
borderColor = "#FAFAFA",
borderWidth = 0.1
) %>%
hc_mapNavigation(enabled = TRUE)# ww_country %>% select(time, mag, place, country) %>% View()
# OTHER METHOD
########################
(ww$time %>% max() - ww$time %>% min()) %>% as.numeric() / 365.4 -> period
ww %>% filter(mag > 4) %>%
mutate(country = place2country(place = place)) %>%
mutate(code = countrycode(country, "country.name", "iso3c")) %>%
group_by(code) %>%
summarize(mean = round(n()/period, 2)) %>%
ungroup() %>%
arrange(desc(mean)) %>%
mutate(country = countrycode(code, "iso3c", "country.name"))-> countries_year
hcmap(
"custom/world",
data = countries_year %>% na.omit(),
value = "mean",
joinBy = c("iso-a3", "code"),
name = "Avg. > 4 Richters Earthquakes in a year",
dataLabels = list(enabled = TRUE, format = '{point.name}'),
borderColor = "#FAFAFA",
borderWidth = 0.1
) %>%
hc_mapNavigation(enabled = TRUE)## HARP
############
worldmap <- get_world_map()
worldmap +
geom_point(
data = ww,
aes(longitude, latitude, color = mag),
pch = 19,
size = 1,
alpha = .4
) +
scale_color_distiller(palette = "Spectral")+
ggtitle("> 4.0 Ritchters Earthquakes")۱۰. سه حقیقت جالب در مورد زلزله بیابید.
حقیقت اول: سونامی ها بیشتر از غیر سونامی ها آدم می کشند.
حقیقت دوم: کشنده ترین زمین لرزه های دوقلو، یعنی زمین لرزه های بزرگی که با فواصل کم رخ داده اند کدامند؟
کشور های با جمعیت بیشتر زلزله ی بیشتری دارند.#### Tsunamis kill more people
##############################
disaster %>% filter(!is.na(FLAG_TSUNAMI), !is.na(TOTAL_DEATHS)) -> tsu
disaster %>% filter(is.na(FLAG_TSUNAMI), !is.na(TOTAL_DEATHS)) -> ntsu
mean(tsu$TOTAL_DEATHS)## [1] 5348.079
mean(ntsu$TOTAL_DEATHS)## [1] 4169.106
wilcox.test(tsu$TOTAL_DEATHS, ntsu$TOTAL_DEATHS, alternative = "greater")##
## Wilcoxon rank sum test with continuity correction
##
## data: tsu$TOTAL_DEATHS and ntsu$TOTAL_DEATHS
## W = 317630, p-value = 8.633e-11
## alternative hypothesis: true location shift is greater than 0
#### Greatest Twin Attacks!
#### (Two EQs that oocured together and killed the most)
########################################################
disaster %>%
select(YEAR, COUNTRY, EQ_PRIMARY, TOTAL_DEATHS) %>%
filter(!is.na(TOTAL_DEATHS)) %>%
group_by(COUNTRY) %>%
arrange(YEAR) %>%
mutate(PREV_YEAR = lag(YEAR)) %>%
mutate(PREV_DEATH = lag(TOTAL_DEATHS)) %>%
mutate(DEATH_TOGETHER = TOTAL_DEATHS + PREV_DEATH)%>%
mutate(PREV_EQ_PRIMARY = lag(EQ_PRIMARY)) %>%
filter(YEAR - PREV_YEAR <= 1) %>%
ungroup() %>% arrange(desc(DEATH_TOGETHER)) %>%
select(COUNTRY, YEAR, PREV_YEAR, DEATH_TOGETHER,
CURR_DEATH = TOTAL_DEATHS, PREV_DEATH,
CURR_MAG = EQ_PRIMARY, PREV_MAG = PREV_EQ_PRIMARY) %>%
arrange(desc(DEATH_TOGETHER)) %>%
head(15) %>%
hchart(type = "bar",
hcaes(x = paste(COUNTRY, "(",YEAR, ")") , y = DEATH_TOGETHER),
name = "Total deaths in two eqs",
tooltip = list(pointFormat = "Country: {point.COUNTRY} <br/>
EQ1 Year: {point.PREV_YEAR} <br/>
EQ2 Year: {point.YEAR} <br/>
Death Toll: {point.DEATH_TOGETHER} <br/>
EQ1 Death Toll: {point.PREV_DEATH} <br/>
EQ2 Death Toll: {point.CURR_DEATH} <br/>
EQ1 Mag.: {point.PREV_MAG} <br/>
EQ2 Mag: {point.CURR_MAG}")) %>%
hc_title(text = "Greatest Twin Attacks") %>%
hc_xAxis(title = list(text = "EQ")) %>%
hc_yAxis(title = list(text = "Death Toll")) %>%
hc_add_theme(hc_theme_sandsignika())#### Countries With Higher EQ rate have higher populations
##########################################################
library(wpp2017)
data(pop)
inner_join(x = pop %>%
select(country_code,population = `2015`),
y = countries_year %>%
mutate(country_code = countrycode(country, "country.name", "iso3n")))%>%
select(country,population, rate = mean) -> population_rate
population_rate %>% mutate(population = round(population / 1000, 2)) %>%
hchart(hcaes(x= population, y = rate, size = population),name = "EQ Rate / Pop.", type = "scatter",
tooltip = list(pointFormat = "Country: {point.country} <br/>
Eq Rate: {point.rate} <br/>
Pop.: {point.population} <br/>")) %>%
hc_title(text = "EQ Rate / Pop.") %>%
hc_xAxis(title = list(text = "Population (million)")) %>%
hc_yAxis(title = list(text = "The Same?")) %>%
hc_add_theme(hc_theme_sandsignika())population_rate %>%
mutate(population = round(population / 1000, 2)) %>%
group_by(rate >= 100) %>%
summarize(population = sum(population)) %>%
ungroup() %>%
hchart(type = "pie",
hcaes(
x = `rate >= 100`,
y = population,
name = as.character(`rate >= 100`)
),
name = "Pop. (millions)") %>%
hc_title(text = "People Living in Countries with >= 100 EQs in a year") %>%
hc_add_theme(hc_theme_sandsignika())cor.test(population_rate$population,
population_rate$rate,
method = "pearson",
alternative = "greater")##
## Pearson's product-moment correlation
##
## data: population_rate$population and population_rate$rate
## t = 2.4178, df = 135, p-value = 0.008474
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
## 0.06443615 1.00000000
## sample estimates:
## cor
## 0.2037283